
* +++++++++++++++++++++++++++++
* FIGURE A8: 
* FACEBOOK VS. SOEP
* +++++++++++++++++++++++++++++

* load SOEP
use "${data_raw}/soep/bgp_refugees.dta", clear

* add weights
merge 1:1 pid using "${data_raw}/soep/phrf.dta", nogen assert(2 3) keep(3)

* create number of friends variable
gen num_friends = bgpr21401
replace num_friends = . if num_friends == -2

* year came to Germany
gen year_came_to_de = bgpr_l_3401
replace year_came_to_de =. if year_came_to_de < 2002

* country where born
gen country_birth = bgpr_l_0201

* winsorize number of friends
gen num_friends_wins = num_friends 
su num_friends [w=bgphrf], d
replace num_friends_wins = `r(p1)' if num_friends_wins < `r(p1)'
replace num_friends_wins = `r(p95)' if num_friends_wins > `r(p95)' & ///
	num_friends_wins !=.

* create age buckets	
gen age = syear-bgpr_l_0103	
replace age = . if bgpr_l_0103 <0 
gen age_bucket = .
replace age_bucket = 1 if inrange(age,18,24)
replace age_bucket = 2 if inrange(age,25,34)
replace age_bucket = 3 if inrange(age,35,44)
replace age_bucket = 4 if inrange(age,45,54)
replace age_bucket = 5 if age >= 55 & age !=. 

* focus on people who came in 2015/16 and are from Syria	
keep if inrange(year_came_to_de, 2015, 2016) & country_birth == 19 
	
* months and quarters in Germany 	
gen months_in_de = bgprmonin - bgpr_l_3402
replace months_in_de =. if bgpr_l_3402 <1
replace months_in_de = months_in_de + 12 if year_came_to_de == 2015
gen quarters_in_de = . 
replace quarters_in_de = 1 if inrange(months_in_de, 0,3)
replace quarters_in_de = 2 if inrange(months_in_de, 4,6)
replace quarters_in_de = 3 if inrange(months_in_de, 7,9)
replace quarters_in_de = 4 if inrange(months_in_de, 10,12)
replace quarters_in_de = 5 if inrange(months_in_de, 13,15)
replace quarters_in_de = 6 if inrange(months_in_de, 16,18)
replace quarters_in_de = 7 if inrange(months_in_de, 19,21)
replace quarters_in_de = 8 if inrange(months_in_de, 22,24)

* bundesland
gen state = bgpr_l_5501_nuts1	
replace state =17 if state < 0
su num_friends_wins [w=bgphrf]

* female
gen female = bgpr_l_0101 ==2

* keep people for whom relevant variables non-missing
keep if num_friends_wins !=. & female !=. & age_bucket !=. & quarters_in_de !=. 

* state level averages
preserve 
gen count_soep = 1 
collapse (mean) num_friends_wins (rawsum) count_soep [w=bgphrf], by(state)	

tempfile soep_state
save `soep_state'
restore 

* state by age avg
gen count_soep = 1 
collapse (mean) num_friends_wins (rawsum) count_soep [w=bgphrf], by(state age_bucket)	

tempfile soep_state_age
save `soep_state_age'

* correlation: all age buckets
* collapse fb data to state level
use "${data_derived}/regional_analysis_data.dta",clear

collapse (mean) n_frnd_nat_lcl_sy_avg_re ///
	(rawsum) n_frnd_nat_lcl_sy_n ///
	[weight=n_frnd_nat_lcl_sy_n], by(state)
renvars n_frnd_nat_lcl_sy_avg_re n_frnd_nat_lcl_sy_n, suff(a)	
	
encode state, gen(state_num)
drop state
ren state_num state

* correlate with SOEP
merge 1:1 state using `soep_state', assert(2 3) nogen keep(3)
	
corr n_frnd_nat_lcl_sy_avg_re num_friends_wins [w=n_frnd_nat_lcl_sy_n]
local corr_all : di %04.2f `r(rho)'
	
* correlation + figure: by age bucket	
* collapse fb data to state by age level
use "${data_derived}/regional_analysis_data.dta",clear
	
foreach age in a1 a2 a3 a4 a5 {
	preserve
	collapse (mean) n_frnd_nat_lcl_sy_avg_re`age' ///
		(rawsum) n_frnd_nat_lcl_sy_n`age' ///
		[weight=n_frnd_nat_lcl_sy_n`age'], by(state)
	
	tempfile fb_`age'
	save `fb_`age''
	restore
}
	
use `fb_a1', clear
forval i = 2/5 {
	merge 1:1 state using `fb_a`i'', nogen assert(3)
}

reshape long n_frnd_nat_lcl_sy_avg_re n_frnd_nat_lcl_sy_n, i(state) j(age_bucket_str) string
gen age_bucket = substr(age_bucket_str, 2, 1)
destring age_bucket, replace
drop age_bucket_str

encode state, gen(state_num)
drop state
ren state_num state

* correlate with SOEP and make figure
merge 1:1 state age_bucket using `soep_state_age', assert(1 2 3) nogen keep(3)

corr n_frnd_nat_lcl_sy_avg_re num_friends_wins [w=n_frnd_nat_lcl_sy_n]
local corr_by_age : di %04.2f `r(rho)'

tw (scatter n_frnd_nat_lcl_sy_avg_re num_friends_wins [w=n_frnd_nat_lcl_sy_n], mcolor("black") msize(vsmall)) ///
	(lfit n_frnd_nat_lcl_sy_avg_re num_friends_wins [w=n_frnd_nat_lcl_sy_n], lcolor("blue")), ///
	ylabel(0(4)12, labsize(large) nogrid) xlabel(0(4)12, labsize(large)) ///
	ytitle("FB: N Local Native Friends", size(large)) /// 
	xtitle("SOEP: N German Acquaintances", size(large)) ///
	graphregion(color(white)) legend(off) ///
	note("Correlation (by age) = `corr_by_age'" ///
	"Correlation (all) = `corr_all'")
graph export "${output}/integration_vs_num_friends_by_age.png", replace width(3000) 
